{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2.0.0\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.model_selection import StratifiedKFold\n",
    "from tqdm import tqdm\n",
    "import tensorflow as tf\n",
    "import tensorflow.keras.backend as K\n",
    "from keras.utils import to_categorical\n",
    "import os\n",
    "from transformers import *\n",
    "print(tf.__version__)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#https://huggingface.co/bert-base-chinese 去这里下载预训练参数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "train shape = (99913, 7)\n",
      "test shape = (10000, 6)\n"
     ]
    }
   ],
   "source": [
    "PATH = './'\n",
    "BERT_PATH = './'\n",
    "MAX_SEQUENCE_LENGTH = 140\n",
    "input_categories = '微博中文内容'\n",
    "output_categories = '情感倾向'\n",
    "\n",
    "df_train = pd.read_csv(PATH+'nCoV_100k_train.labled.csv',engine ='python')\n",
    "df_train = df_train[df_train[output_categories].isin(['-1','0','1'])]\n",
    "df_test = pd.read_csv(PATH+'nCov_10k_test.csv',engine ='python')\n",
    "df_sub = pd.read_csv(PATH+'submit_example.csv')\n",
    "print('train shape =', df_train.shape)\n",
    "print('test shape =', df_test.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def _convert_to_transformer_inputs(instance, tokenizer, max_sequence_length):\n",
    "    \"\"\"Converts tokenized input to ids, masks and segments for transformer (including bert)\"\"\"\n",
    "    \n",
    "    def return_id(str1, truncation_strategy, length):\n",
    "\n",
    "        inputs = tokenizer.encode_plus(str1,\n",
    "            add_special_tokens=True,\n",
    "            max_length=length,\n",
    "            truncation_strategy=truncation_strategy)\n",
    "        \n",
    "        input_ids =  inputs[\"input_ids\"]\n",
    "        input_masks = [1] * len(input_ids)\n",
    "        input_segments = inputs[\"token_type_ids\"]\n",
    "        padding_length = length - len(input_ids)\n",
    "        padding_id = tokenizer.pad_token_id\n",
    "        input_ids = input_ids + ([padding_id] * padding_length)\n",
    "        input_masks = input_masks + ([0] * padding_length)\n",
    "        input_segments = input_segments + ([0] * padding_length)\n",
    "        \n",
    "        return [input_ids, input_masks, input_segments]\n",
    "    \n",
    "    input_ids, input_masks, input_segments = return_id(\n",
    "        instance, 'longest_first', max_sequence_length)\n",
    "    \n",
    "    return [input_ids, input_masks, input_segments]\n",
    "\n",
    "def compute_input_arrays(df, columns, tokenizer, max_sequence_length):\n",
    "    input_ids, input_masks, input_segments = [], [], []\n",
    "    for instance in tqdm(df[columns]):\n",
    "        \n",
    "        ids, masks, segments = \\\n",
    "        _convert_to_transformer_inputs(str(instance), tokenizer, max_sequence_length)\n",
    "        \n",
    "        input_ids.append(ids)\n",
    "        input_masks.append(masks)\n",
    "        input_segments.append(segments)\n",
    "\n",
    "    return [np.asarray(input_ids, dtype=np.int32), \n",
    "            np.asarray(input_masks, dtype=np.int32), \n",
    "            np.asarray(input_segments, dtype=np.int32)\n",
    "           ]\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "I0229 15:23:46.032863 140665502107456 tokenization_utils.py:327] Model name './bert-base-chinese-vocab.txt' not found in model shortcut name list (bert-base-uncased, bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, bert-base-multilingual-cased, bert-base-chinese, bert-base-german-cased, bert-large-uncased-whole-word-masking, bert-large-cased-whole-word-masking, bert-large-uncased-whole-word-masking-finetuned-squad, bert-large-cased-whole-word-masking-finetuned-squad, bert-base-cased-finetuned-mrpc, bert-base-german-dbmdz-cased, bert-base-german-dbmdz-uncased, bert-base-finnish-cased-v1, bert-base-finnish-uncased-v1). Assuming './bert-base-chinese-vocab.txt' is a path or url to a directory containing tokenizer files.\n",
      "I0229 15:23:46.033831 140665502107456 tokenization_utils.py:359] Didn't find file ./added_tokens.json. We won't load it.\n",
      "I0229 15:23:46.034261 140665502107456 tokenization_utils.py:359] Didn't find file ./special_tokens_map.json. We won't load it.\n",
      "I0229 15:23:46.034701 140665502107456 tokenization_utils.py:359] Didn't find file ./tokenizer_config.json. We won't load it.\n",
      "I0229 15:23:46.035204 140665502107456 tokenization_utils.py:395] loading file ./bert-base-chinese-vocab.txt\n",
      "I0229 15:23:46.035539 140665502107456 tokenization_utils.py:395] loading file None\n",
      "I0229 15:23:46.035930 140665502107456 tokenization_utils.py:395] loading file None\n",
      "I0229 15:23:46.036242 140665502107456 tokenization_utils.py:395] loading file None\n",
      "100%|██████████| 99913/99913 [00:54<00:00, 1826.01it/s]\n",
      "100%|██████████| 10000/10000 [00:05<00:00, 1908.79it/s]\n"
     ]
    }
   ],
   "source": [
    "tokenizer = BertTokenizer.from_pretrained(BERT_PATH+'bert-base-chinese-vocab.txt')\n",
    "inputs = compute_input_arrays(df_train, input_categories, tokenizer, MAX_SEQUENCE_LENGTH)\n",
    "test_inputs = compute_input_arrays(df_test, input_categories, tokenizer, MAX_SEQUENCE_LENGTH)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_output_arrays(df, columns):\n",
    "    return np.asarray(df[columns].astype(int) + 1)\n",
    "outputs = compute_output_arrays(df_train, output_categories)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_model():\n",
    "    input_id = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)    \n",
    "    input_mask = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)    \n",
    "    input_atn = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH,), dtype=tf.int32)    \n",
    "    config = BertConfig.from_pretrained(BERT_PATH + 'bert-base-chinese-config.json') \n",
    "    config.output_hidden_states = False \n",
    "    bert_model = TFBertModel.from_pretrained(\n",
    "        BERT_PATH+'bert-base-chinese-tf_model.h5', config=config)\n",
    "    # if config.output_hidden_states = True, obtain hidden states via bert_model(...)[-1]\n",
    "    embedding = bert_model(input_id, attention_mask=input_mask, token_type_ids=input_atn)[0]\n",
    "    \n",
    "    x = tf.keras.layers.GlobalAveragePooling1D()(embedding)    \n",
    "    x = tf.keras.layers.Dropout(0.15)(x)\n",
    "    x = tf.keras.layers.Dense(3, activation='softmax')(x)\n",
    "\n",
    "    model = tf.keras.models.Model(inputs=[input_id, input_mask, input_atn], outputs=x)\n",
    "    \n",
    "    return model\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "I0229 16:29:07.124153 140665502107456 configuration_utils.py:182] loading configuration file ./bert-base-chinese-config.json\n",
      "I0229 16:29:07.124994 140665502107456 configuration_utils.py:199] Model config {\n",
      "  \"architectures\": [\n",
      "    \"BertForMaskedLM\"\n",
      "  ],\n",
      "  \"attention_probs_dropout_prob\": 0.1,\n",
      "  \"directionality\": \"bidi\",\n",
      "  \"finetuning_task\": null,\n",
      "  \"hidden_act\": \"gelu\",\n",
      "  \"hidden_dropout_prob\": 0.1,\n",
      "  \"hidden_size\": 768,\n",
      "  \"id2label\": {\n",
      "    \"0\": \"LABEL_0\",\n",
      "    \"1\": \"LABEL_1\"\n",
      "  },\n",
      "  \"initializer_range\": 0.02,\n",
      "  \"intermediate_size\": 3072,\n",
      "  \"is_decoder\": false,\n",
      "  \"label2id\": {\n",
      "    \"LABEL_0\": 0,\n",
      "    \"LABEL_1\": 1\n",
      "  },\n",
      "  \"layer_norm_eps\": 1e-12,\n",
      "  \"max_position_embeddings\": 512,\n",
      "  \"num_attention_heads\": 12,\n",
      "  \"num_hidden_layers\": 12,\n",
      "  \"num_labels\": 2,\n",
      "  \"output_attentions\": false,\n",
      "  \"output_hidden_states\": false,\n",
      "  \"output_past\": true,\n",
      "  \"pooler_fc_size\": 768,\n",
      "  \"pooler_num_attention_heads\": 12,\n",
      "  \"pooler_num_fc_layers\": 3,\n",
      "  \"pooler_size_per_head\": 128,\n",
      "  \"pooler_type\": \"first_token_transform\",\n",
      "  \"pruned_heads\": {},\n",
      "  \"torchscript\": false,\n",
      "  \"type_vocab_size\": 2,\n",
      "  \"use_bfloat16\": false,\n",
      "  \"vocab_size\": 21128\n",
      "}\n",
      "\n",
      "I0229 16:29:07.125755 140665502107456 modeling_tf_utils.py:297] loading weights file ./bert-base-chinese-tf_model.h5\n",
      "I0229 16:29:08.319382 140665502107456 modeling_tf_utils.py:339] Layers from pretrained model not used in TFBertModel: ['mlm___cls', 'nsp___cls']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train on 79929 samples, validate on 19984 samples\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "W0229 16:29:16.700071 140665502107456 optimizer_v2.py:1029] Gradients do not exist for variables ['tf_bert_model/bert/pooler/dense/kernel:0', 'tf_bert_model/bert/pooler/dense/bias:0'] when minimizing the loss.\n",
      "W0229 16:29:23.009484 140665502107456 optimizer_v2.py:1029] Gradients do not exist for variables ['tf_bert_model/bert/pooler/dense/kernel:0', 'tf_bert_model/bert/pooler/dense/bias:0'] when minimizing the loss.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "79929/79929 [==============================] - 775s 10ms/sample - loss: 0.6146 - acc: 0.7308 - mae: 0.2364 - val_loss: 0.5645 - val_acc: 0.7524 - val_mae: 0.2173\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "I0229 16:43:46.566224 140665502107456 configuration_utils.py:182] loading configuration file ./bert-base-chinese-config.json\n",
      "I0229 16:43:46.567252 140665502107456 configuration_utils.py:199] Model config {\n",
      "  \"architectures\": [\n",
      "    \"BertForMaskedLM\"\n",
      "  ],\n",
      "  \"attention_probs_dropout_prob\": 0.1,\n",
      "  \"directionality\": \"bidi\",\n",
      "  \"finetuning_task\": null,\n",
      "  \"hidden_act\": \"gelu\",\n",
      "  \"hidden_dropout_prob\": 0.1,\n",
      "  \"hidden_size\": 768,\n",
      "  \"id2label\": {\n",
      "    \"0\": \"LABEL_0\",\n",
      "    \"1\": \"LABEL_1\"\n",
      "  },\n",
      "  \"initializer_range\": 0.02,\n",
      "  \"intermediate_size\": 3072,\n",
      "  \"is_decoder\": false,\n",
      "  \"label2id\": {\n",
      "    \"LABEL_0\": 0,\n",
      "    \"LABEL_1\": 1\n",
      "  },\n",
      "  \"layer_norm_eps\": 1e-12,\n",
      "  \"max_position_embeddings\": 512,\n",
      "  \"num_attention_heads\": 12,\n",
      "  \"num_hidden_layers\": 12,\n",
      "  \"num_labels\": 2,\n",
      "  \"output_attentions\": false,\n",
      "  \"output_hidden_states\": false,\n",
      "  \"output_past\": true,\n",
      "  \"pooler_fc_size\": 768,\n",
      "  \"pooler_num_attention_heads\": 12,\n",
      "  \"pooler_num_fc_layers\": 3,\n",
      "  \"pooler_size_per_head\": 128,\n",
      "  \"pooler_type\": \"first_token_transform\",\n",
      "  \"pruned_heads\": {},\n",
      "  \"torchscript\": false,\n",
      "  \"type_vocab_size\": 2,\n",
      "  \"use_bfloat16\": false,\n",
      "  \"vocab_size\": 21128\n",
      "}\n",
      "\n",
      "I0229 16:43:46.568093 140665502107456 modeling_tf_utils.py:297] loading weights file ./bert-base-chinese-tf_model.h5\n",
      "I0229 16:43:47.688924 140665502107456 modeling_tf_utils.py:339] Layers from pretrained model not used in TFBertModel: ['mlm___cls', 'nsp___cls']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train on 79929 samples, validate on 19984 samples\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "W0229 16:43:56.046372 140665502107456 optimizer_v2.py:1029] Gradients do not exist for variables ['tf_bert_model/bert/pooler/dense/kernel:0', 'tf_bert_model/bert/pooler/dense/bias:0'] when minimizing the loss.\n",
      "W0229 16:44:02.300729 140665502107456 optimizer_v2.py:1029] Gradients do not exist for variables ['tf_bert_model/bert/pooler/dense/kernel:0', 'tf_bert_model/bert/pooler/dense/bias:0'] when minimizing the loss.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "79929/79929 [==============================] - 775s 10ms/sample - loss: 0.6082 - acc: 0.7365 - mae: 0.2329 - val_loss: 0.6147 - val_acc: 0.7119 - val_mae: 0.2428\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "I0229 16:58:25.003383 140665502107456 configuration_utils.py:182] loading configuration file ./bert-base-chinese-config.json\n",
      "I0229 16:58:25.004909 140665502107456 configuration_utils.py:199] Model config {\n",
      "  \"architectures\": [\n",
      "    \"BertForMaskedLM\"\n",
      "  ],\n",
      "  \"attention_probs_dropout_prob\": 0.1,\n",
      "  \"directionality\": \"bidi\",\n",
      "  \"finetuning_task\": null,\n",
      "  \"hidden_act\": \"gelu\",\n",
      "  \"hidden_dropout_prob\": 0.1,\n",
      "  \"hidden_size\": 768,\n",
      "  \"id2label\": {\n",
      "    \"0\": \"LABEL_0\",\n",
      "    \"1\": \"LABEL_1\"\n",
      "  },\n",
      "  \"initializer_range\": 0.02,\n",
      "  \"intermediate_size\": 3072,\n",
      "  \"is_decoder\": false,\n",
      "  \"label2id\": {\n",
      "    \"LABEL_0\": 0,\n",
      "    \"LABEL_1\": 1\n",
      "  },\n",
      "  \"layer_norm_eps\": 1e-12,\n",
      "  \"max_position_embeddings\": 512,\n",
      "  \"num_attention_heads\": 12,\n",
      "  \"num_hidden_layers\": 12,\n",
      "  \"num_labels\": 2,\n",
      "  \"output_attentions\": false,\n",
      "  \"output_hidden_states\": false,\n",
      "  \"output_past\": true,\n",
      "  \"pooler_fc_size\": 768,\n",
      "  \"pooler_num_attention_heads\": 12,\n",
      "  \"pooler_num_fc_layers\": 3,\n",
      "  \"pooler_size_per_head\": 128,\n",
      "  \"pooler_type\": \"first_token_transform\",\n",
      "  \"pruned_heads\": {},\n",
      "  \"torchscript\": false,\n",
      "  \"type_vocab_size\": 2,\n",
      "  \"use_bfloat16\": false,\n",
      "  \"vocab_size\": 21128\n",
      "}\n",
      "\n",
      "I0229 16:58:25.005825 140665502107456 modeling_tf_utils.py:297] loading weights file ./bert-base-chinese-tf_model.h5\n",
      "I0229 16:58:26.145487 140665502107456 modeling_tf_utils.py:339] Layers from pretrained model not used in TFBertModel: ['mlm___cls', 'nsp___cls']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train on 79931 samples, validate on 19982 samples\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "W0229 16:58:35.410702 140665502107456 optimizer_v2.py:1029] Gradients do not exist for variables ['tf_bert_model/bert/pooler/dense/kernel:0', 'tf_bert_model/bert/pooler/dense/bias:0'] when minimizing the loss.\n",
      "W0229 16:58:41.762564 140665502107456 optimizer_v2.py:1029] Gradients do not exist for variables ['tf_bert_model/bert/pooler/dense/kernel:0', 'tf_bert_model/bert/pooler/dense/bias:0'] when minimizing the loss.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "79931/79931 [==============================] - 774s 10ms/sample - loss: 0.5869 - acc: 0.7447 - mae: 0.2277 - val_loss: 0.7075 - val_acc: 0.6957 - val_mae: 0.2478\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "I0229 17:13:03.675060 140665502107456 configuration_utils.py:182] loading configuration file ./bert-base-chinese-config.json\n",
      "I0229 17:13:03.676147 140665502107456 configuration_utils.py:199] Model config {\n",
      "  \"architectures\": [\n",
      "    \"BertForMaskedLM\"\n",
      "  ],\n",
      "  \"attention_probs_dropout_prob\": 0.1,\n",
      "  \"directionality\": \"bidi\",\n",
      "  \"finetuning_task\": null,\n",
      "  \"hidden_act\": \"gelu\",\n",
      "  \"hidden_dropout_prob\": 0.1,\n",
      "  \"hidden_size\": 768,\n",
      "  \"id2label\": {\n",
      "    \"0\": \"LABEL_0\",\n",
      "    \"1\": \"LABEL_1\"\n",
      "  },\n",
      "  \"initializer_range\": 0.02,\n",
      "  \"intermediate_size\": 3072,\n",
      "  \"is_decoder\": false,\n",
      "  \"label2id\": {\n",
      "    \"LABEL_0\": 0,\n",
      "    \"LABEL_1\": 1\n",
      "  },\n",
      "  \"layer_norm_eps\": 1e-12,\n",
      "  \"max_position_embeddings\": 512,\n",
      "  \"num_attention_heads\": 12,\n",
      "  \"num_hidden_layers\": 12,\n",
      "  \"num_labels\": 2,\n",
      "  \"output_attentions\": false,\n",
      "  \"output_hidden_states\": false,\n",
      "  \"output_past\": true,\n",
      "  \"pooler_fc_size\": 768,\n",
      "  \"pooler_num_attention_heads\": 12,\n",
      "  \"pooler_num_fc_layers\": 3,\n",
      "  \"pooler_size_per_head\": 128,\n",
      "  \"pooler_type\": \"first_token_transform\",\n",
      "  \"pruned_heads\": {},\n",
      "  \"torchscript\": false,\n",
      "  \"type_vocab_size\": 2,\n",
      "  \"use_bfloat16\": false,\n",
      "  \"vocab_size\": 21128\n",
      "}\n",
      "\n",
      "I0229 17:13:03.682703 140665502107456 modeling_tf_utils.py:297] loading weights file ./bert-base-chinese-tf_model.h5\n",
      "I0229 17:13:04.859646 140665502107456 modeling_tf_utils.py:339] Layers from pretrained model not used in TFBertModel: ['mlm___cls', 'nsp___cls']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train on 79931 samples, validate on 19982 samples\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "W0229 17:13:12.855053 140665502107456 optimizer_v2.py:1029] Gradients do not exist for variables ['tf_bert_model/bert/pooler/dense/kernel:0', 'tf_bert_model/bert/pooler/dense/bias:0'] when minimizing the loss.\n",
      "W0229 17:13:20.854532 140665502107456 optimizer_v2.py:1029] Gradients do not exist for variables ['tf_bert_model/bert/pooler/dense/kernel:0', 'tf_bert_model/bert/pooler/dense/bias:0'] when minimizing the loss.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "79931/79931 [==============================] - 773s 10ms/sample - loss: 0.6124 - acc: 0.7333 - mae: 0.2343 - val_loss: 0.5972 - val_acc: 0.7450 - val_mae: 0.2220\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "I0229 17:27:44.097016 140665502107456 configuration_utils.py:182] loading configuration file ./bert-base-chinese-config.json\n",
      "I0229 17:27:44.098013 140665502107456 configuration_utils.py:199] Model config {\n",
      "  \"architectures\": [\n",
      "    \"BertForMaskedLM\"\n",
      "  ],\n",
      "  \"attention_probs_dropout_prob\": 0.1,\n",
      "  \"directionality\": \"bidi\",\n",
      "  \"finetuning_task\": null,\n",
      "  \"hidden_act\": \"gelu\",\n",
      "  \"hidden_dropout_prob\": 0.1,\n",
      "  \"hidden_size\": 768,\n",
      "  \"id2label\": {\n",
      "    \"0\": \"LABEL_0\",\n",
      "    \"1\": \"LABEL_1\"\n",
      "  },\n",
      "  \"initializer_range\": 0.02,\n",
      "  \"intermediate_size\": 3072,\n",
      "  \"is_decoder\": false,\n",
      "  \"label2id\": {\n",
      "    \"LABEL_0\": 0,\n",
      "    \"LABEL_1\": 1\n",
      "  },\n",
      "  \"layer_norm_eps\": 1e-12,\n",
      "  \"max_position_embeddings\": 512,\n",
      "  \"num_attention_heads\": 12,\n",
      "  \"num_hidden_layers\": 12,\n",
      "  \"num_labels\": 2,\n",
      "  \"output_attentions\": false,\n",
      "  \"output_hidden_states\": false,\n",
      "  \"output_past\": true,\n",
      "  \"pooler_fc_size\": 768,\n",
      "  \"pooler_num_attention_heads\": 12,\n",
      "  \"pooler_num_fc_layers\": 3,\n",
      "  \"pooler_size_per_head\": 128,\n",
      "  \"pooler_type\": \"first_token_transform\",\n",
      "  \"pruned_heads\": {},\n",
      "  \"torchscript\": false,\n",
      "  \"type_vocab_size\": 2,\n",
      "  \"use_bfloat16\": false,\n",
      "  \"vocab_size\": 21128\n",
      "}\n",
      "\n",
      "I0229 17:27:44.098880 140665502107456 modeling_tf_utils.py:297] loading weights file ./bert-base-chinese-tf_model.h5\n",
      "I0229 17:27:45.277370 140665502107456 modeling_tf_utils.py:339] Layers from pretrained model not used in TFBertModel: ['mlm___cls', 'nsp___cls']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train on 79932 samples, validate on 19981 samples\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "W0229 17:27:53.955852 140665502107456 optimizer_v2.py:1029] Gradients do not exist for variables ['tf_bert_model/bert/pooler/dense/kernel:0', 'tf_bert_model/bert/pooler/dense/bias:0'] when minimizing the loss.\n",
      "W0229 17:28:00.428308 140665502107456 optimizer_v2.py:1029] Gradients do not exist for variables ['tf_bert_model/bert/pooler/dense/kernel:0', 'tf_bert_model/bert/pooler/dense/bias:0'] when minimizing the loss.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "79932/79932 [==============================] - 774s 10ms/sample - loss: 0.6015 - acc: 0.7368 - mae: 0.2307 - val_loss: 0.6143 - val_acc: 0.7271 - val_mae: 0.2349\n"
     ]
    }
   ],
   "source": [
    "gkf = StratifiedKFold(n_splits=5).split(X=df_train[input_categories].fillna('-1'), y=df_train[output_categories].fillna('-1'))\n",
    "\n",
    "valid_preds = []\n",
    "test_preds = []\n",
    "for fold, (train_idx, valid_idx) in enumerate(gkf):\n",
    "    \n",
    "        train_inputs = [inputs[i][train_idx] for i in range(len(inputs))]\n",
    "        train_outputs = to_categorical(outputs[train_idx])\n",
    "\n",
    "        valid_inputs = [inputs[i][valid_idx] for i in range(len(inputs))]\n",
    "        valid_outputs = to_categorical(outputs[valid_idx])\n",
    "        \n",
    "        K.clear_session()\n",
    "        model = create_model()\n",
    "        optimizer = tf.keras.optimizers.Adam(learning_rate=2e-5)\n",
    "        model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['acc', 'mae'])\n",
    "        model.fit(train_inputs, train_outputs, validation_data= [valid_inputs, valid_outputs], epochs=1, batch_size=128)\n",
    "        # model.save_weights(f'bert-{fold}.h5')\n",
    "        valid_preds.append(model.predict(valid_inputs))\n",
    "        test_preds.append(model.predict(test_inputs))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [],
   "source": [
    "sub = np.average(test_preds, axis=0)\n",
    "sub = np.argmax(sub,axis=1)\n",
    "df_sub['y'] = sub-1\n",
    "df_sub['id'] = df_sub['id'].apply(lambda x: str(x)+' ')\n",
    "df_sub.to_csv('test_sub.csv',index=False, encoding='utf-8')\n",
    "#线上726"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#代码修改自https://www.kaggle.com/akensert的开源"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
